{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "bd8b8fdc-8691-4578-8e88-0b29ca6a43ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "import json\n",
    "import numpy as np\n",
    "\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "\n",
    "from transformers import (AutoTokenizer,\n",
    "                          AutoModel,\n",
    "                          AutoConfig,\n",
    "                          AutoModelForMaskedLM,\n",
    "                          AdamW,\n",
    "                          BertTokenizer,\n",
    "                          BertModel,\n",
    "                          get_scheduler,\n",
    "                          set_seed,\n",
    "                          BertPreTrainedModel,\n",
    "                          LongformerConfig,\n",
    "                          LongformerModel,\n",
    "                          LongformerTokenizer,\n",
    "\n",
    "                         )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "dfff5e6c-705f-4d57-902f-1b356d0d480d",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataPath1='../pheno_24/trainp2x_data.pkl'\n",
    "dataPath2='../pheno_24/testp2x_data.pkl'\n",
    "dataPath3='../pheno_24/valp2x_data.pkl'\n",
    "with open(dataPath1, 'rb') as f:\n",
    "    data1 = pickle.load(f)\n",
    "with open(dataPath2, 'rb') as f:\n",
    "    data2 = pickle.load(f)\n",
    "with open(dataPath3, 'rb') as f:\n",
    "    data3 = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "44b41d25-e7e8-45c4-ab4f-aad28e1b3364",
   "metadata": {},
   "outputs": [],
   "source": [
    "count0=0\n",
    "count1=0\n",
    "for i in range(len(data1)):\n",
    "    label=data1[i]['label']\n",
    "    if label==1:\n",
    "        count1+=1\n",
    "    else:\n",
    "        count0+=1\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "29bb7729-48a4-4ad3-bce5-ecc1975ed8cd",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Token indices sequence length is longer than the specified maximum sequence length for this model (5596 > 4096). Running this sequence through the model will result in indexing errors\n"
     ]
    }
   ],
   "source": [
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"yikuan8/Clinical-Longformer\")\n",
    "text_len=[]\n",
    "no_note=[]\n",
    "for val in data1:\n",
    "    for t in val['text_data']:\n",
    "        inputs = tokenizer.encode_plus(t,\\\n",
    "                                            add_special_tokens=True)\n",
    "        # import pdb;\n",
    "        # pdb.set_trace()\n",
    "        text_len.append(len(inputs['input_ids']))\n",
    "    no_note.append(len(val['text_data']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "59ee4eba-39ba-41df-930d-d075f5741e56",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10.293705860488167"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(no_note)/16142"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "a232dea1-d64d-4e46-9126-1d5432ac91c1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.07697353771342252"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "count = len([i for i in no_note  if i > 5])\n",
    "count/len(text_len)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "eec2873b-2bc4-477a-a5f4-86a8e6ed1a85",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.0, 4450.0)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEGCAYAAACUzrmNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAc00lEQVR4nO3dfZRddX3v8feH8KgiCUJzp0naBEnFWPGQjgEHbougIWBrxl7whmVLLnKN2tDK7cMQrEtGkC5pL0W4CykRosGKIYKEXJoWwqM3txcSAoeQgJGRh5IYiBIejA/BwPf+sX8TNsPM7DPM7PMw83mtddbZ+7t/e5/vj3XId/Zv77N/igjMzMwGs1ejEzAzs+bnYmFmZoVcLMzMrJCLhZmZFXKxMDOzQns3OoEyHHLIITF16tRGp2Fm1lLWr1//04g4tL9to7JYTJ06lfvvv7/RaZiZtRRJTw20zcNQb0Jn55pGp2BmVlcuFmZmVsjFwszMCrlYmJlZIRcLMzMr5GJhZmaFXCzMzKyQi4WZmRVysTAzs0IuFmZmVsjFYhj8S24zGytcLMzMrJCLhZmZFXKxMDOzQi4WZmZWyMXCzMwKuViYmVmh0ouFpHGSHpR0S1qfJuk+ST2Srpe0b4rvl9Z70vapuWOcl+KbJZ1Uds5mZvZ69Tiz+BzwaG79YuDSiDgceB44K8XPAp5P8UtTOyTNAOYB7wHmAF+TNK4OeZuZWVJqsZA0GfgIcHVaF3ACcENqshToTMtz0zpp+4mp/VxgWUTsiogngB5gVpl5m5nZ6+1d8vG/CnQBB6b1dwAvRMTutL4FmJSWJwFPA0TEbkkvpvaTgHtzx8zvs4ekBcACgLa2NqrV6kj243U6OnZSrVb3vJuZjXalFQtJfwhsj4j1ko4v63N6RcRiYDFAe3t7VCqV0j6ru3sNXV2VPe9mZqNdmWcWxwIflXQKsD/wduAyYLykvdPZxWRga2q/FZgCbJG0N3AQ8Fwu3iu/j5mZ1UFp1ywi4ryImBwRU8kuUN8ZEZ8A7gJOTc3mAzen5ZVpnbT9zoiIFJ+X7paaBkwH1paVt5mZvVHZ1yz6cy6wTNKXgQeBa1L8GuBbknqAHWQFhojYJGk58AiwG1gYEa/UP20zs7GrLsUiIu4G7k7Lj9PP3UwR8SvgtAH2vwi4qLwMzcxsMP4Ft5mZFXKxeJM88ZGZjSUuFkPkImFmY5GLhZmZFXKxGCafaZjZWOBiYWZmhVwszMyskIuFmZkVcrEwM7NCLhZmZlbIxcLMzAq5WJiZWSEXCzMzK+RiYWZmhVwszMyskIuFmZkVKq1YSNpf0lpJD0naJOlLKf5NSU9IqqZXJcUl6XJJPZI2SJqZO9Z8SY+l1/wBPtLMzEpS5kx5u4ATImKnpH2ANZL+NW37m4i4oU/7k8nm154OHA1cCRwt6WDgfKAdCGC9pJUR8XyJuZuZWU5pZxaR2ZlW90mvGGSXucC1ab97gfGS2oCTgNURsSMViNXAnLLyNjOzNyp1Dm5J44D1wOHAFRFxn6TPAhdJ+iJwB7AoInYBk4Cnc7tvSbGB4n0/awGwAKCtrY1qtTryHQI6Ona+IVbWZ5mZNYtSi0VEvAJUJI0HbpL0u8B5wDPAvsBi4FzgghH4rMXpeLS3t0elUhnuIfvV3f3G+Su6usr5LDOzZlGXu6Ei4gXgLmBORGxLQ027gG8As1KzrcCU3G6TU2yguJmZ1UmZd0Mdms4okHQA8GHgB+k6BJIEdAIb0y4rgTPSXVHHAC9GxDbgVmC2pAmSJgCzU6zuPCuemY1VZQ5DtQFL03WLvYDlEXGLpDslHQoIqAKfSe1XAacAPcAvgDMBImKHpAuBdandBRGxo8S8zcysj9KKRURsAI7qJ37CAO0DWDjAtiXAkhFN0MzMauZfcJuZWSEXCzMzK+RiYWZmhVwszMyskIuFmZkVcrEwM7NCLhZmZlbIxcLMzAq5WJiZWSEXCzMzK+RiYWZmhVwszMyskIuFmZkVcrEwM7NCLhZmZlaozJny9pe0VtJDkjZJ+lKKT5N0n6QeSddL2jfF90vrPWn71NyxzkvxzZJOKitnMzPrX5lnFruAEyLifUAFmJOmS70YuDQiDgeeB85K7c8Cnk/xS1M7JM0A5gHvAeYAX0uz75mZWZ2UViwiszOt7pNeAZwA3JDiS8nm4QaYm9ZJ209M83TPBZZFxK6IeIJs2tVZZeVtZmZvVOYc3KQzgPXA4cAVwI+AFyJid2qyBZiUlicBTwNExG5JLwLvSPF7c4fN75P/rAXAAoC2tjaq1epId4eOjp39xsv4LDOzZlJqsYiIV4CKpPHATcARJX7WYmAxQHt7e1QqlRH/jO7uNf3Gu7pG/rPMzJpJXe6GiogXgLuADwDjJfUWqcnA1rS8FZgCkLYfBDyXj/ezT9Po7Oy/kJiZjQZl3g11aDqjQNIBwIeBR8mKxqmp2Xzg5rS8Mq2Ttt8ZEZHi89LdUtOA6cDasvLOcwEwM8uUOQzVBixN1y32ApZHxC2SHgGWSfoy8CBwTWp/DfAtST3ADrI7oIiITZKWA48Au4GFaXjLzMzqpLRiEREbgKP6iT9OP3czRcSvgNMGONZFwEUjnaOZmdXGv+A2M7NCLhZmZlbIxWIE+EK4mY12LhY1ckEws7HMxcLMzAq5WJiZWSEXixHkoSozG61cLMzMrJCLhZmZFaqpWEh6b9mJmJlZ86r1zOJraYrUP5N0UKkZmZlZ06mpWETEfwY+Qfao8PWSrpP04VIzMzOzplHzNYuIeAz4AnAu8AfA5ZJ+IOmPy0rOzMyaQ63XLI6UdCnZfBQnAH8UEe9Oy5eWmJ+ZmTWBWh9R/r+Aq4HPR8Qve4MR8WNJXyglMzMzaxq1DkN9BLiut1BI2kvSWwAi4lv97SBpiqS7JD0iaZOkz6V4t6StkqrpdUpun/Mk9UjaLOmkXHxOivVIWvRmO2tmZm9OrWcWtwMfAnam9bcAtwEdg+yzG/iriHhA0oFkF8ZXp22XRsT/zDeWNINsdrz3AL8J3C7pd9LmK8imZd0CrJO0MiIeqTF3MzMbplqLxf4R0VsoiIidvWcWA4mIbcC2tPwzSY8CkwbZZS6wLCJ2AU+k6VV7Z9TrSTPsIWlZautiYWZWJ7UOQ/1c0szeFUm/B/xykPavI2kq2RSr96XQ2ZI2SFoiaUKKTQKezu22JcUGipuZWZ3UemZxDvBdST8GBPwn4L/WsqOktwE3AudExEuSrgQuBCK9XwJ8coh59/c5C4AFAG1tbVSr1eEeko6OnXuO09Gxc/DGyUh8rplZs6mpWETEOklHAO9Koc0R8eui/STtQ1Yovh0R30vHeja3/evALWl1K9mP/npNTjEGiedzXAwsBmhvb49KpVLcsQLd3Wvo6qrsWa5Fb3szs9FkKA8SfD9wJDATOF3SGYM1liTgGuDRiPjHXLwt1+xjwMa0vBKYJ2k/SdOA6cBaYB0wXdI0SfuSXQRfOYS8zcxsmGo6s5D0LeCdQBV4JYUDuHaQ3Y4F/hR4WFI1xT5PVmgqaf8ngU8DRMQmScvJLlzvBhZGxCvp888GbgXGAUsiYlNNvTMzsxFR6zWLdmBGREStB46INWTXN/paNcg+FwEX9RNfNdh+ZmZWrlqHoTaSXdQ2M7MxqNYzi0OARyStBXb1BiPio6VkZWZmTaXWYtFdZhJmZtbcar119h5Jvw1Mj4jb06+3x5WbmpmZNYtaH1H+KeAG4KoUmgSsKCknMzNrMrVe4F5IdivsS7BnIqTfKCspMzNrLrUWi10R8XLviqS9yX4nYWZmY0CtxeIeSZ8HDkhzb38X+N/lpWVmZs2k1mKxCPgJ8DDZL65Xkc3HPSZ0dtb2XCgzs9Gq1ruhXgW+nl5mZjbG1PpsqCfo5xpFRBw24hmZmVnTGcqzoXrtD5wGHDzy6ZiZWTOq6ZpFRDyXe22NiK8CHyk3NTMzaxa1DkPNzK3uRXamUetZiZmZtbha/8G/JLe8m2weio+PeDZmZtaUar0b6oNlJzKadHauYcWK4xqdhpnZiKl1GOovB9uenzY1t88Uspn0JpLdSbU4Ii6TdDBwPTCVdIYSEc+naVgvA04BfgH8t4h4IB1rPq/9ruPLEbG0lrwbwb/JMLPRqNYf5bUDnyV7gOAk4DNkc3EfmF792Q38VUTMAI4BFkqaQfYDvzsiYjpwR1oHOJls3u3pwALgSoBUXM4HjgZmAedLmjCEPjaEi4aZjSa1XrOYDMyMiJ8BSOoG/iUi/mSgHSJiG7AtLf9M0qNkhWYucHxqthS4Gzg3xa9NU7feK2m8pLbUdnVE7EifvRqYA3yn5l6amdmw1FosJgIv59ZfTrGaSJoKHAXcB0xMhQTgmdxxJgFP53bbwmtnMv3F+37GArIzEtra2qhWq7Wm16/bbnuGjg6oVqt0dOx8U8cYbg5mZs2i1mJxLbBW0k1pvZPsrKCQpLcBNwLnRMRL2aWJTESEpBF5em1ELAYWA7S3t0elUhnW8bq7s2Gkrq7KnuWh6uoaXg5mZs2i1h/lXQScCTyfXmdGxN8V7SdpH7JC8e2I+F4KP5uGl0jv21N8KzAlt/vkFBsobmZmdVLrBW6AtwAvRcRlwBZJ0wZrnO5uugZ4tM/dUiuB+Wl5PnBzLn6GMscAL6bhqluB2ZImpAvbs1PMzMzqpNZbZ88nuyPqXcA3gH2AfyabPW8gxwJ/CjwsqZpinwe+AiyXdBbwFK/9uG8V2W2zPWS3zp4JEBE7JF0IrEvtLui92G1mZvVR6zWLj5FdoH4AICJ+LGmgW2ZJbdYAGmDzif20D7LpW/s71hJgSY25mpnZCKt1GOrl9I95AEh6a3kpmZlZs6m1WCyXdBUwXtKngNvxREhmZmNG4TBUulB9PXAE8BLZdYsvRsTqknMzM7MmUVgs0m8hVkXEewEXCDOzMajWYagHJL2/1EzMzKxp1Xo31NHAn0h6Evg52V1OERFHlpWYmZk1j0GLhaTfioj/AE6qUz6jiue1MLPRoujMYgXZ02afknRjRPyXOuRkZmZNpuiaRf5HdYeVmYiZmTWvomIRAyybmdkYUjQM9T5JL5GdYRyQluG1C9xvLzU7MzNrCoMWi4gYV69EzMyseQ3lEeVmZjZGuViYmVkhFwszMytUWrGQtETSdkkbc7FuSVslVdPrlNy28yT1SNos6aRcfE6K9UhaVFa+ZensfHPzd5uZNZMyzyy+CczpJ35pRFTSaxWApBnAPOA9aZ+vSRonaRxwBXAyMAM4PbU1M7M6qvXZUEMWEd+XNLXG5nOBZRGxC3hCUg8wK23riYjHASQtS20fGel8zcxsYI24ZnG2pA1pmGpCik0Cns612ZJiA8XNzKyOSjuzGMCVwIVkvwa/ELgE+ORIHFjSAmABQFtbG9VqdVjH6+jYCUC1Wt2z/GYNNxczs0ara7GIiGd7lyV9HbglrW4FpuSaTk4xBon3PfZiYDFAe3t7VCqVYeXa3Z1dmO7qquxZfrO6uoaXi5lZo9V1GEpSW271Y0DvnVIrgXmS9pM0DZgOrAXWAdMlTZO0L9lF8JX1zHkk+I4oM2t1pZ1ZSPoOcDxwiKQtwPnA8ZIqZMNQTwKfBoiITZKWk1243g0sjIhX0nHOBm4FxgFLImJTWTmbmVn/yrwb6vR+wtcM0v4i4KJ+4quAVSOYmpmZDZF/wW1mZoVcLMzMrJCLhZmZFXKxMDOzQi4WZmZWyMXCzMwKuViYmVkhF4s68a+4zayVuViYmVkhFwszMyvkYlFHHooys1blYmFmZoVcLMzMrJCLhZmZFXKxqDNftzCzVuRiYWZmhUorFpKWSNouaWMudrCk1ZIeS+8TUlySLpfUI2mDpJm5fean9o9Jml9WvmZmNrAyzyy+CczpE1sE3BER04E70jrAyWTzbk8HFgBXQlZcyKZjPRqYBZzfW2DMzKx+SisWEfF9YEef8FxgaVpeCnTm4tdG5l5gvKQ24CRgdUTsiIjngdW8sQCZmVnJSpuDewATI2JbWn4GmJiWJwFP59ptSbGB4m8gaQHZWQltbW1Uq9VhJdrRsROAarW6Z3mkDDc3M7N6q3ex2CMiQlKM4PEWA4sB2tvbo1KpDOt43d3ZXUtdXZU9yyPl3/99JytWHDeixzQzK1O974Z6Ng0vkd63p/hWYEqu3eQUGyhuZmZ1VO9isRLovaNpPnBzLn5GuivqGODFNFx1KzBb0oR0YXt2ipmZWR2VNgwl6TvA8cAhkraQ3dX0FWC5pLOAp4CPp+argFOAHuAXwJkAEbFD0oXAutTugojoe9HczMxKVlqxiIjTB9h0Yj9tA1g4wHGWAEtGMLWm0Nm5xtctzKxl+BfcZmZWyMXCzMwKuVg0kB8qaGatwsWiwVwwzKwVuFiYmVkhFwszMyvkYmFmZoVcLJqAr1uYWbNzsTAzs0IuFmZmVsjFwszMCrlYNBFfuzCzZuViYWZmhVwszMyskItFk/FQlJk1o4YUC0lPSnpYUlXS/Sl2sKTVkh5L7xNSXJIul9QjaYOkmY3I2cxsLGvkmcUHI6ISEe1pfRFwR0RMB+5I6wAnA9PTawFwZd0zNTMb45ppGGousDQtLwU6c/FrI3MvMF5SWwPyMzMbs0qbVrVAALdJCuCqiFgMTIyIbWn7M8DEtDwJeDq375YU25aLIWkB2ZkHbW1tVKvVYSXY0bETgGq1ume5TPnPGW7uZmYjrVHF4riI2CrpN4DVkn6Q3xgRkQpJzVLBWQzQ3t4elUplWAl2d2cXmru6KnuWy5T/nK6uiufoNrOm0pBhqIjYmt63AzcBs4Bne4eX0vv21HwrMCW3++QUG/V8Z5SZNYu6FwtJb5V0YO8yMBvYCKwE5qdm84Gb0/JK4Ix0V9QxwIu54SozM6uDRgxDTQRuktT7+ddFxL9JWgcsl3QW8BTw8dR+FXAK0AP8Ajiz/imbmY1tdS8WEfE48L5+4s8BJ/YTD2BhHVIzM7MBNNOtszYAX7sws0ZzsWgRLhhm1kguFmZmVsjFosX4DMPMGsHFwszMCrlYtCCfXZhZvblYmJlZIRcLMzMr5GLRwjwcZWb14mJhZmaFXCxanM8uzKweXCxGARcMMyubi4WZmRVysRglfHZhZmVq1LSqVoJ8wfCUrGY2klwsRikXDjMbSS0zDCVpjqTNknokLWp0Pq2ks3PNnuLh4SozezNaolhIGgdcAZwMzABOlzSjsVm1nnzB6G/ZzGwgrTIMNQvoSVOyImkZMBd4pKFZtbh8kRioYKxYcdyebX2Xe/frO8yVj9U6HNbfccysebRKsZgEPJ1b3wIcnW8gaQGwIK3ulLR5GJ93CPDT7LjDOMoQ5D9nqMsD2NOHeuY11FgN20ekH03A/Wgeo6EPUE4/fnugDa1SLApFxGJg8UgcS9L9EdE+EsdqlNHQB3A/ms1o6Mdo6APUvx8tcc0C2ApMya1PTjEzM6uDVikW64DpkqZJ2heYB6xscE5mZmNGSwxDRcRuSWcDtwLjgCURsanEjxyR4awGGw19APej2YyGfoyGPkCd+6GIqOfnmZlZC2qVYSgzM2sgFwszMyvkYpHT7I8UkbRE0nZJG3OxgyWtlvRYep+Q4pJ0eerLBkkzc/vMT+0fkzS/zn2YIukuSY9I2iTpcy3aj/0lrZX0UOrHl1J8mqT7Ur7XpxsykLRfWu9J26fmjnVeim+WdFI9+5HLYZykByXd0qr9kPSkpIclVSXdn2Kt9r0aL+kGST+Q9KikDzRNHyLCr+y6zTjgR8BhwL7AQ8CMRufVJ8ffB2YCG3OxvwcWpeVFwMVp+RTgXwEBxwD3pfjBwOPpfUJanlDHPrQBM9PygcAPyR7h0mr9EPC2tLwPcF/KbzkwL8X/CfhsWv4z4J/S8jzg+rQ8I33X9gOmpe/guAZ8t/4SuA64Ja23XD+AJ4FD+sRa7Xu1FPjvaXlfYHyz9KGuX8hmfgEfAG7NrZ8HnNfovPrJcyqvLxabgba03AZsTstXAaf3bQecDlyVi7+uXQP6czPw4VbuB/AW4AGypwr8FNi773eK7E6+D6TlvVM79f2e5dvVMf/JwB3ACcAtKa9W7MeTvLFYtMz3CjgIeIJ041Gz9cHDUK/p75EikxqUy1BMjIhtafkZYGJaHqg/TdPPNIRxFNlf5S3XjzR0UwW2A6vJ/pp+ISJ295PTnnzT9heBd9AE/QC+CnQBr6b1d9Ca/QjgNknrlT3+B1rrezUN+AnwjTQkeLWkt9IkfXCxGEUi+zOiJe6FlvQ24EbgnIh4Kb+tVfoREa9ERIXsL/NZwBGNzWjoJP0hsD0i1jc6lxFwXETMJHs69UJJv5/f2ALfq73JhpmvjIijgJ+TDTvt0cg+uFi8plUfKfKspDaA9L49xQfqT8P7KWkfskLx7Yj4Xgq3XD96RcQLwF1kwzXjJfX+2DWf05580/aDgOdofD+OBT4q6UlgGdlQ1GW0Xj+IiK3pfTtwE1kBb6Xv1RZgS0Tcl9ZvICseTdEHF4vXtOojRVYCvXc7zCe7BtAbPyPdMXEM8GI6lb0VmC1pQrqrYnaK1YUkAdcAj0bEP+Y2tVo/DpU0Pi0fQHbd5VGyonHqAP3o7d+pwJ3pr8SVwLx0l9E0YDqwti6dACLivIiYHBFTyb7zd0bEJ2ixfkh6q6QDe5fJvg8baaHvVUQ8Azwt6V0pdCLZNAzN0Yd6XoBq9hfZ3QU/JBt7/ttG59NPft8BtgG/Jvsr5Cyy8eI7gMeA24GDU1uRTRj1I+BhoD13nE8CPel1Zp37cBzZafQGoJpep7RgP44EHkz92Ah8McUPI/tHsgf4LrBfiu+f1nvS9sNyx/rb1L/NwMkN/H4dz2t3Q7VUP1K+D6XXpt7/f1vwe1UB7k/fqxVkdzM1RR/8uA8zMyvkYSgzMyvkYmFmZoVcLMzMrJCLhZmZFXKxMDOzQi4WNqpICkmX5Nb/WlL3CB37m5JOLW457M85LT1x9K5c7L3paapVSTskPZGWbx/gGDvLztPGFhcLG212AX8s6ZBGJ5KX+zV0Lc4CPhURH+wNRMTDEVGJ7PEiK4G/SesfGuFUzfrlYmGjzW6yuYn/R98Nfc8Mev/6lnS8pHsk3SzpcUlfkfQJZfNVPCzpnbnDfEjS/ZJ+mJ6r1PtAwX+QtC7NK/Dp3HH/j6SVZL/E7ZvP6en4GyVdnGJfJPvh4jWS/qGos/0do8/2QyT9P0kfSb86vzHluU7SsalNt7K5Uu5O/f+Los+1sWcof+2YtYorgA2S/n4I+7wPeDewg+z5/1dHxCxlkzP9OXBOajeV7JlD7wTuknQ4cAbZoxbeL2k/4P9Kui21nwn8bkQ8kf8wSb8JXAz8HvA82dNSOyPiAkknAH8dEfcPlvAgx1iRtk8kOwv5QkSslnQdcGlErJH0W2SPgHh3OtwRwAfJ5hjZLOnKiPj1EP772SjnYmGjTkS8JOla4C+AX9a427pIj4GW9COg9x/7h8n+Ee21PCJeBR6T9DjZP7KzgSNzZy0HkT0b6WVgbd9CkbwfuDsifpI+89tkk1utqDHfomPsQ/aIiIURcU9q/yFgRvZ4LgDeruzpvwD/EhG7gF2StpM9BnvLEHKxUc7Fwkarr5JNSPSNXGw3aehV0l5kM5H12pVbfjW3/iqv//+k7/NxguwZPX8eEa97WJuk48keM90Iu4H1wElAb7HYCzgmIn6Vb5iKR77/r+B/G6wPX7OwUSkidpBNDXpWLvwk2ZANwEfJ/voeqtMk7ZWuYxxG9tC8W4HPKnv0OpJ+Jz35dDBrgT9I1xTGkc1udk/BPkM5RpA9TO4ISeem2G1kQ2qkPCtD/Dwbw/zXg41mlwBn59a/Dtws6SHg33hzf/X/B9k/0m8HPhMRv5J0Ndm1jAeU/Zn+E6BzsINExDZJi8geBS6yYaCbB9tnqMeIiFcknQ6slPQzsmG5KyRtIPt///vAZ4bymTZ2+amzZmZWyMNQZmZWyMXCzMwKuViYmVkhFwszMyvkYmFmZoVcLMzMrJCLhZmZFfr/RmRGvUrIgJcAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# An \"interface\" to matplotlib.axes.Axes.hist() method\n",
    "n, bins, patches = plt.hist(x=text_len, bins='auto', color='#0504aa',\n",
    "                            alpha=0.7, rwidth=0.85)\n",
    "plt.grid(axis='y', alpha=0.75)\n",
    "plt.xlabel('Number of Token')\n",
    "plt.ylabel('Frequency')\n",
    "plt.title('')\n",
    "maxfreq = n.max()\n",
    "# Set a clean upper y-axis limit.\n",
    "plt.ylim(ymax=np.ceil(maxfreq / 10) * 10 if maxfreq % 10 else maxfreq + 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "cf4577ff-9a0b-4cde-b149-65b16338b3bb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "9.0"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.quantile(np.array(no_note),0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "134cd8c3-80dc-4c1f-a55a-31afdbe5cb43",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Opening JSON file\n",
    "channel_info_file = open('channel_info.json')\n",
    "dis_config_file=open('discretizer_config.json')\n",
    "channel_info = json.load(channel_info_file)\n",
    "dis_config=json.load(dis_config_file)\n",
    "channel_name=dis_config['id_to_channel']\n",
    "is_catg=dis_config['is_categorical_channel']\n",
    "\n",
    "\n",
    "\n",
    "# import pdb;\n",
    "# pdb.set_trace()\n",
    "for x in X:\n",
    "    tt=[]\n",
    "    features_list=[]\n",
    "    features_mask_list=[]\n",
    "    for t_idx, feature in enumerate(x):\n",
    "        f_list_per_t=[]\n",
    "        f_mask_per_t=[]\n",
    "        for f_idx, val in enumerate(feature):\n",
    "            if f_idx==0:\n",
    "                tt.append(round(float(val),2))\n",
    "            else:\n",
    "                head=channel_name[f_idx-1]\n",
    "                if val=='':\n",
    "                    f_list_per_t.append(0)\n",
    "                    f_mask_per_t.append(0)\n",
    "                else:\n",
    "                    f_mask_per_t.append(1)\n",
    "                    if is_catg[head]:\n",
    "                        val=channel_info[head]['values'][val]\n",
    "                    f_list_per_t.append(float(round(float(val),2)))\n",
    "        features_list.append(f_list_per_t)\n",
    "        features_mask_list.append(f_mask_per_t)\n",
    "                            \n",
    "                    \n",
    "                \n",
    "\n",
    "            \n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "8ad9d84f-16b2-4b59-a77a-9aad79efbf77",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    " \n",
    "# returns JSON object as\n",
    "# a dictionary\n",
    "\n",
    " \n",
    "# # Iterating through the json\n",
    "# # list\n",
    "# for i in data['emp_details']:\n",
    "#     print(i)\n",
    " \n",
    "# # Closing file\n",
    "# f.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "b4d2dbb7-afdd-49b2-9dcc-ae4cbd13964c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'None': 0, '1 No Response': 1, '2 To pain': 2, 'To Pain': 2, '3 To speech': 3, 'To Speech': 3, '4 Spontaneously': 4, 'Spontaneously': 4}\n",
      "1\n"
     ]
    }
   ],
   "source": [
    "\n",
    "    print(channel_info[head]['values'])\n",
    "    val=channel_info[head]['values']['1 No Response']\n",
    "    print(val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "7c6c63a7-f60c-4f77-b810-9cdfe01ecfbc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Capillary refill rate': True,\n",
       " 'Diastolic blood pressure': False,\n",
       " 'Fraction inspired oxygen': False,\n",
       " 'Glascow coma scale eye opening': True,\n",
       " 'Glascow coma scale motor response': True,\n",
       " 'Glascow coma scale total': True,\n",
       " 'Glascow coma scale verbal response': True,\n",
       " 'Glucose': False,\n",
       " 'Heart Rate': False,\n",
       " 'Height': False,\n",
       " 'Mean blood pressure': False,\n",
       " 'Oxygen saturation': False,\n",
       " 'Respiratory rate': False,\n",
       " 'Systolic blood pressure': False,\n",
       " 'Temperature': False,\n",
       " 'Weight': False,\n",
       " 'pH': False}"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dis_config['is_categorical_channel']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "24db5524-b8d4-4d6d-ae17-2fdc66479459",
   "metadata": {},
   "outputs": [],
   "source": [
    "array([9.97581909e-01, 2.41809141e-03, 6.13446788e+01, 2.91782989e-01,\n",
    "       2.35905819e-02, 9.61617056e-02, 7.64721409e-02, 3.97438299e-01,\n",
    "       4.87705197e-02, 7.66722294e-02, 2.46534637e-01, 3.43598869e-02,\n",
    "       4.07939400e-02, 2.52735963e-03, 2.07609609e-03, 2.77129737e-02,\n",
    "       3.33537679e-02, 4.35256454e-02, 2.41723997e-02, 2.95404491e-01,\n",
    "       2.04061939e-03, 4.53922871e-01, 7.19268669e-02, 2.54296937e-03,\n",
    "       3.75315033e-02, 5.17420475e-02, 1.38543582e-02, 5.76992939e-03,\n",
    "       7.14500602e-01, 4.15545603e-02, 3.61848080e-02, 3.05099562e-03,\n",
    "       3.87121677e-03, 2.45356810e-02, 1.85443771e-02, 2.31563472e-02,\n",
    "       2.57035738e-02, 1.00810571e-02, 1.34314761e-02, 3.71228118e-02,\n",
    "       2.50607361e-03, 2.08155950e-01, 1.26431839e-01, 3.33472402e-01,\n",
    "       7.36921872e-03, 2.05979100e-01, 4.29367323e-02, 9.31192244e-03,\n",
    "       3.20141680e-03, 1.37544028e+02, 8.61442336e+01, 1.69770706e+02,\n",
    "       7.86865227e+01, 9.82986876e+01, 1.92625636e+01, 1.20336360e+02,\n",
    "       3.69188357e+01, 8.18668639e+01, 7.22800334e+00, 3.40860069e-03,\n",
    "       9.04012840e-01, 6.26050110e-02, 3.09781066e-01, 3.08462752e-01,\n",
    "       1.82280669e-01, 3.08698318e-01, 2.59389971e-01, 9.24627637e-01,\n",
    "       3.96771337e-03, 9.00331778e-01, 8.92981007e-01, 9.10188622e-01,\n",
    "       9.04160423e-01, 3.25268771e-01, 3.11031265e-02, 1.30764253e-01])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39f60508-783e-4b05-9ce1-90f9778b041a",
   "metadata": {},
   "outputs": [],
   "source": [
    "[9.97498075e-01, 2.50192456e-03, 6.11747474e+01, 2.91638191e-01,\n",
    "       2.31682742e-02, 9.60736200e-02, 7.75653217e-02, 3.96644195e-01,\n",
    "       4.95601820e-02, 7.61332247e-02, 2.45887674e-01, 3.49675089e-02,\n",
    "       4.16907236e-02, 3.06231037e-03, 1.81276321e-03, 2.81891047e-02,\n",
    "       3.38637187e-02, 4.33959381e-02, 2.40329099e-02, 2.94659636e-01,\n",
    "       2.02786080e-03, 4.52361545e-01, 7.24256215e-02, 2.47786759e-03,\n",
    "       3.73746207e-02, 5.15937033e-02, 1.42247430e-02, 5.73687905e-03,\n",
    "       7.12386225e-01, 4.13270389e-02, 3.69642372e-02, 3.43448580e-03,\n",
    "       3.99487162e-03, 2.46555608e-02, 1.88790586e-02, 2.35376194e-02,\n",
    "       2.58909568e-02, 9.88033782e-03, 1.33233143e-02, 3.67208373e-02,\n",
    "       2.56985011e-03, 2.08957399e-01, 1.25510857e-01, 3.31938029e-01,\n",
    "       7.15057963e-03, 2.07859270e-01, 4.31468777e-02, 9.50533216e-03,\n",
    "       3.43731603e-03, 1.37495478e+02, 8.62587068e+01, 1.69793005e+02,\n",
    "       7.88177433e+01, 9.82863168e+01, 1.92775957e+01, 1.20316522e+02,\n",
    "       3.69323905e+01, 8.19019827e+01, 7.22785885e+00, 3.34391840e-03,\n",
    "       9.04332518e-01, 6.25721709e-02, 3.10136757e-01, 3.08789567e-01,\n",
    "       1.82442376e-01, 3.09074005e-01, 2.59179856e-01, 9.25080945e-01,\n",
    "       3.94817280e-03, 9.00613594e-01, 8.93579054e-01, 9.11381210e-01,\n",
    "       9.04491011e-01, 3.26240773e-01, 3.10193361e-02, 1.31090658e-01])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "cn",
   "language": "python",
   "name": "cn"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
